#delim cr
set more off
set varabbrev off
pause on
graph set ps logo off

capture log close
set linesize 200
set logtype text
log using ../log/create-imputed-pollution-measures.log , replace

/* --------------------------------------

Take the environmental data we have
and impute pollution measures.

--------------------------------------- */

clear all
estimates clear
set mem 50m

#delimit;

use ../dta/all-our-environmental-data, clear;

***impute PM25 from PM10 & visibility***;

***create PM25 in ug/m3***;
gen i_l = 0 if twitter_aqi >= 0 & twitter_aqi <= 50;
gen i_h = 50 if twitter_aqi >= 0 & twitter_aqi <= 50;
gen b_l = 0 if twitter_aqi >= 0 & twitter_aqi <= 50;
gen b_h = 15.4 if twitter_aqi >= 0 & twitter_aqi <= 50;
replace i_l = 51 if twitter_aqi > 50 & twitter_aqi <= 100;
replace i_h = 100 if twitter_aqi > 50 & twitter_aqi <= 100;
replace b_l = 15.5 if twitter_aqi > 50 & twitter_aqi <= 100;
replace b_h = 40.4 if twitter_aqi > 50 & twitter_aqi <= 100;
replace i_l = 101 if twitter_aqi > 100 & twitter_aqi <= 150;
replace i_h = 150 if twitter_aqi > 100 & twitter_aqi <= 150;
replace b_l = 40.5 if twitter_aqi > 100 & twitter_aqi <= 150;
replace b_h = 65.4 if twitter_aqi > 100 & twitter_aqi <= 150;
replace i_l = 151 if twitter_aqi > 150 & twitter_aqi <= 200;
replace i_h = 200 if twitter_aqi > 150 & twitter_aqi <= 200;
replace b_l = 65.5 if twitter_aqi > 150 & twitter_aqi <= 200;
replace b_h = 150.4 if twitter_aqi > 150 & twitter_aqi <= 200;
replace i_l = 201 if twitter_aqi > 200 & twitter_aqi <= 300;
replace i_h = 300 if twitter_aqi > 200 & twitter_aqi <= 300;
replace b_l = 150.5 if twitter_aqi > 200 & twitter_aqi <= 300;
replace b_h = 250.4 if twitter_aqi > 200 & twitter_aqi <= 300;
replace i_l = 301 if twitter_aqi > 300 & twitter_aqi <= 400;
replace i_h = 400 if twitter_aqi > 300 & twitter_aqi <= 400;
replace b_l = 250.5 if twitter_aqi > 300 & twitter_aqi <= 400;
replace b_h = 350.4 if twitter_aqi > 300 & twitter_aqi <= 400;
replace i_l = 401 if twitter_aqi > 400 & twitter_aqi <= 500;
replace i_h = 500 if twitter_aqi > 400 & twitter_aqi <= 500;
replace b_l = 350.5 if twitter_aqi > 400 & twitter_aqi <= 500;
replace b_h = 500.4 if twitter_aqi > 400 & twitter_aqi <= 500;
gen pm25 = (twitter_aqi - i_l)*((b_h - b_l)/(i_h - i_l)) + b_l;
label var pm25 "PM25 in ug/m3 (embassy data, converted to concentration)";
drop i_l i_h b_l b_h;

***check***;
recode twitter_aqi (0/50=1) (51/100=2) (101/150=3) (151/200=4) (201/300=5) (301/400=6) (401/500=7), gen(aqi);
table aqi, c(min pm25 max pm25 count pm25);

***create PM10 in ug/m3***;
gen i_l = 0 if junjie_api_xx >= 0 & junjie_api_xx <= 50;
gen i_h = 50 if junjie_api_xx >= 0 & junjie_api_xx <= 50;
gen b_l = 0 if junjie_api_xx >= 0 & junjie_api_xx <= 50;
gen b_h = .05 if junjie_api_xx >= 0 & junjie_api_xx <= 50;
replace i_l = 51 if junjie_api_xx > 50 & junjie_api_xx <= 100;
replace i_h = 100 if junjie_api_xx > 50 & junjie_api_xx <= 100;
replace b_l = .051 if junjie_api_xx > 50 & junjie_api_xx <= 100;
replace b_h = .15 if junjie_api_xx > 50 & junjie_api_xx <= 100;
replace i_l = 101 if junjie_api_xx > 100 & junjie_api_xx <= 150;
replace i_h = 150 if junjie_api_xx > 100 & junjie_api_xx <= 150;
replace b_l = .151 if junjie_api_xx > 100 & junjie_api_xx <= 150;
replace b_h = .35 if junjie_api_xx > 100 & junjie_api_xx <= 150;
replace i_l = 151 if junjie_api_xx > 150 & junjie_api_xx <= 200;
replace i_h = 200 if junjie_api_xx > 150 & junjie_api_xx <= 200;
replace b_l = .351 if junjie_api_xx > 150 & junjie_api_xx <= 200;
replace b_h = .42 if junjie_api_xx > 150 & junjie_api_xx <= 200;
replace i_l = 201 if junjie_api_xx > 200 & junjie_api_xx <= 300;
replace i_h = 300 if junjie_api_xx > 200 & junjie_api_xx <= 300;
replace b_l = .421 if junjie_api_xx > 200 & junjie_api_xx <= 300;
replace b_h = .5 if junjie_api_xx > 200 & junjie_api_xx <= 300;
replace i_l = 301 if junjie_api_xx > 300 & junjie_api_xx <= 400;
replace i_h = 400 if junjie_api_xx > 300 & junjie_api_xx <= 400;
replace b_l = .501 if junjie_api_xx > 300 & junjie_api_xx <= 400;
replace b_h = .6 if junjie_api_xx > 300 & junjie_api_xx <= 400;
replace i_l = 401 if junjie_api_xx > 400 & junjie_api_xx <= 500;
replace i_h = 500 if junjie_api_xx > 400 & junjie_api_xx <= 500;
replace b_l = .601 if junjie_api_xx > 400 & junjie_api_xx <= 500;
replace b_h = .7 if junjie_api_xx > 400 & junjie_api_xx <= 500;
gen pm10 = ((junjie_api_xx - i_l)*((b_h - b_l)/(i_h - i_l)) + b_l)*1000;
label var pm10 "PM10 in ug/m3 (junjie_api_xx's original data, converted to concentration)";
drop i_l i_h b_l b_h;

***check***;
recode junjie_api_xx (0/50=1) (51/100=2) (101/150=3) (151/200=4) (201/300=5) (301/400=6) (401/500=7), gen(api);
table api, c(min pm10 max pm10 count pm10);

***mean NCDC temp***;
egen tmean = rmean(tmax_fah tmin_fah);
compare tmean temp;
corr tmean temp;
corr precipitation prcp;

***dow & month dummies***;
gen month = month(Date);
tab month, gen(monthdum);
gen dow = dow(Date);
tab dow, gen(dowdum);

***calculate RH from dewp***;
gen dewpC = (dewp - 32)/1.8;
gen tmeanC = (temp - 32)/1.8;
gen E = 6.11*10^(7.5*dewpC/(237.7+dewpC));
gen Es = 6.11*10^(7.5*tmeanC/(237.7+tmeanC));
gen rh = E/Es*100;

***calculate Bext***;
gen bext = (18.6*rh)/visib;

***API for PM10***;
******with humidity & visib linear******;
***regression with NCDC temp & precip***;
regress pm25 junjie_api_xx visib dewp wdsp frshtt precipitation tmean dowdum* monthdum*;
predict pm25h_api1;
replace pm25h_api1 = pm25 if pm25 ~= .;
label var pm25h_api1 "predicted PM25 using API and NCDC temp/precip";

***regression with Shuang temp & precip***;
regress pm25 junjie_api_xx visib dewp wdsp frshtt prcp temp dowdum* monthdum*;
predict pm25h_api2;
replace pm25h_api2 = pm25 if pm25 ~= .;
label var pm25h_api2 "predicted PM25 using API and Shuang's temp/precip";

******with Bext******;
***regression with NCDC temp & precip***;
regress pm25 junjie_api_xx bext wdsp frshtt precipitation tmean dowdum* monthdum*;

***regression with Shuang temp & precip***;
regress pm25 junjie_api_xx bext wdsp frshtt prcp temp dowdum* monthdum*;

***regression with Shuang temp & precip - minimal regression***;
regress pm25 junjie_api_xx bext monthdum*;
regress twitter_aqi junjie_api_xx bext monthdum*;

***concentration for PM10***;
******with humidity & visib linear******;
***regression with NCDC temp & precip***;
regress pm25 pm10 visib dewp wdsp frshtt precipitation tmean dowdum* monthdum*;
predict pm25h_con1;
replace pm25h_con1 = pm25 if pm25 ~= .;
label var pm25h_con1 "predicted PM25 using PM10 and NCDC temp/precip";

***regression with Shuang temp & precip***;
regress pm25 pm10 visib dewp wdsp frshtt prcp temp dowdum* monthdum*;
predict pm25h_con2;
replace pm25h_con2 = pm25 if pm25 ~= .;
label var pm25h_con2 "predicted PM25 using PM10 and Shuang's temp/precip";

******with Bext******;
***regression with NCDC temp & precip***;
regress pm25 pm10 bext wdsp frshtt precipitation tmean dowdum* monthdum*;

***regression with Shuang temp & precip***;
regress pm25 pm10 bext wdsp frshtt prcp temp dowdum* monthdum*;

***regression with Shuang temp & precip - minimal regression***;
regress pm25 pm10 bext monthdum*;

***double check***;
summ pm25*;
summ pm25* if pm25 ~= .;

************************************************************
**   Save & Close
************************************************************;


***final vars***;
keep Date junjie_api_xx pm10 pm25 pm25h_con2;
ren pm25h_con2 pm25_imp;
label var pm25_imp "imputed PM2.5 in ug/m3, based on pm10, visibility, etc.";
label var junjie_api_xx "original API data from junjie_api_xx";
order Date junjie_api_xx pm10;

compress;
save ../dta/imputed-pollution-measures.dta , replace;




log close;
exit;

